Required R
libraries
# Install pacman if not already installed
if (!require("pacman")) install.packages("pacman")
## Indlæser krævet pakke: pacman
# Load all necessary packages using pacman for easy management
pacman::p_load(
"edgeR", "readr", "readxl", "magrittr", "tibble", "stringr",
"ggplot2", "data.table", "patchwork", "openxlsx", "GOplot",
"dplyr", "missForest", "RColorBrewer", "ggpubr", "tidyr",
"SummarizedExperiment", "GenomicRanges", "BiocGenerics",
"S4Vectors", "IRanges", "GenomeInfoDb", "Biobase", "pheatmap"
)
Chord Diagram on ORA
(Over Representation Analysis) for Control Vs Sham in the Right
Atrium
# 1. Load GO ORA results
go_file <- "../../02_gene-enrichment/output/GO_ora.tsv.gz"
go_ora <- fread(go_file) # Read in the GO ORA results
# Define selected GO terms and expected regulation direction (up/down regulation)
selected_go_terms <- c("mitochondrial inner membrane", "calcium ion binding",
"angiogenesis", "adherens junction", "protein phosphorylation")
go_term_direction <- list(
"mitochondrial inner membrane" = "up",
"calcium ion binding" = "down",
"angiogenesis" = "down",
"adherens junction" = "down",
"protein phosphorylation" = "down"
)
# 2. Filter GO ORA data based on selected GO terms and regulation direction for "AF_vs_sham_RA"
filtered_go_ora <- go_ora %>%
filter(Contrast == "AF_vs_sham_RA" & Description %in% selected_go_terms) %>%
rowwise() %>%
filter(go_term_direction[[Description]] %in% Direction)
# 3. Extract relevant gene IDs and GO terms
gene_list <- unique(unlist(strsplit(filtered_go_ora$geneID, "/"))) # Split gene lists to get unique genes
go_terms <- filtered_go_ora$Description
# 4. Load DGE (Differential Gene Expression) results to match log fold-change (logFC) data with gene IDs
dge_file <- "../../01_dge/output/dge_results.tsv.gz"
dge <- fread(dge_file) # Read in DGE results
# Filter DGE data for the specific contrast "AF_vs_sham_RA"
dge_filtered <- dge %>%
filter(Contrast == "AF_vs_sham_RA")
# 5. Extract logFC and match it with gene_list from GO ORA results
logFC_data <- dge_filtered %>%
filter(ENSEMBL %in% gene_list) %>%
select(ENSEMBL, GENENAME, logFC) %>%
filter(GENENAME != "X" & GENENAME != "X1" & GENENAME != "" & !is.na(GENENAME)) # Remove invalid gene names
# 6. Transform GO ORA data: convert ENSEMBL IDs to gene names in the "Genes" column
# A. Select and rename relevant columns to match the structure of EC$david
go_ora_transformed <- filtered_go_ora %>%
select(Category = Database, ID, Term = Description, Genes = geneID, adj_pval = p.adjust)
# B. Split ENSEMBL IDs in the 'Genes' column into individual IDs
go_ora_transformed$Genes_split <- strsplit(go_ora_transformed$Genes, "/")
# C. Create a lookup table from logFC_data to map ENSEMBL to GENENAME
ensembl_to_genename <- setNames(logFC_data$GENENAME, logFC_data$ENSEMBL)
# D. Map ENSEMBL IDs to gene names using the lookup table
go_ora_transformed$Genes_converted <- lapply(go_ora_transformed$Genes_split, function(ensembl_list) {
gene_names <- ensembl_to_genename[ensembl_list] # Replace ENSEMBL with gene names
gene_names[!is.na(gene_names)] # Return valid gene names, discard NAs
})
# E. Collapse the list of gene names into a single string for each row
go_ora_transformed$Genes <- sapply(go_ora_transformed$Genes_converted, paste, collapse = ", ")
# F. Remove intermediate columns (Genes_split and Genes_converted)
go_ora_transformed <- go_ora_transformed %>%
select(-Genes_split, -Genes_converted)
# 7. Prepare logFC data in the structure matching EC$genes
logFC_data_transformed <- logFC_data %>%
select(ID = GENENAME, logFC) # Rename columns to match EC$genes format
# 8. Create the circ object using circle_dat() combining GO terms and logFC data
circ <- circle_dat(go_ora_transformed, logFC_data_transformed)
# 9. Prepare the chord plot data using chord_dat()
chord_data <- chord_dat(data = circ, genes = logFC_data_transformed$ID, process = go_ora_transformed$Term)
# A. Include logFC in the chord data
chord_data <- cbind(chord_data, logFC = logFC_data_transformed$logFC[match(rownames(chord_data), logFC_data_transformed$ID)])
# 10. Generate and plot the chord diagram
GOChord(chord_data, space = 0.02, gene.order = 'logFC', gene.size = 5)
## Warning: Using size for a discrete variable is not advised.
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Generate heatmap for chord data
GOHeat(chord_data)
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion
## Warning in GOHeat(chord_data): NAs introduced by coercion

# Custom color palette for processes (ribbons)
custom_colors <- c("mitochondrial inner membrane" = "#62b3e5", # blue
"calcium ion binding" = "#0db14a", # green
"angiogenesis" = "#8b2289", # purple
"adherens junction" = "#b0373d", # red
"protein phosphorylation" = "#fcb985") # orange
# Generate and plot the chord diagram with custom ribbon colors
GOChord(chord_data,
space = 0.02,
gene.order = 'logFC',
gene.size = 5,
ribbon.col = custom_colors)
## Warning: Using size for a discrete variable is not advised.
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Define relevant genes for the Chord-Diagram
# 1. Manually define your custom gene list based on biological knowledge or example
custom_gene_list <- c("COX7B", "COX4I1", "NDUFA12", "NDUFS2", "NDUFS3", "NDUFA12",
"CYC1", "LDHB", "SDHD", "NDUFA6", "NRP1", "NRP2", "AMOT",
"FGF1", "AMOTL2", "CCN2", "AK1", "PKP2", "TJP1", "JUP",
"VCL", "CTNND1", "ITPR3", "ITPR1", "PLCB2", "PLCB3",
"MYLK", "MYLK3", "CAMK1D", "SGK1", "PRKG1")
# 2. Filter logFC_data to match only the genes in the custom gene list
logFC_data_custom <- logFC_data %>%
filter(GENENAME %in% custom_gene_list) %>%
select(ID = GENENAME, logFC)
# 3. Update the GO ORA data to keep terms containing the custom genes
go_ora_custom <- go_ora_transformed %>%
filter(grepl(paste(custom_gene_list, collapse = "|"), Genes))
# 4. Prepare the circular data object using circle_dat with the custom gene list
circ_custom <- circle_dat(go_ora_custom, logFC_data_custom)
# 5. Prepare the chord plot data using chord_dat
chord_data_custom <- chord_dat(data = circ_custom, genes = logFC_data_custom$ID, process = go_ora_custom$Term)
# 6. Add the logFC column to the chord data for custom plotting
chord_data_custom <- cbind(chord_data_custom, logFC = logFC_data_custom$logFC[match(rownames(chord_data_custom), logFC_data_custom$ID)])
# 7. Define a simple but elegant color palette for the ribbons (adjust as desired)
custom_colors <- c("#264653", "#2A9D8F", "#E9C46A", "#F4A261", "#E76F51")
# 8. Generate the chord diagram for custom genes
GOChord(chord_data_custom,
space = 0.04,
gene.order = 'logFC',
gene.size = 10,
gene.space = 0.5,
ribbon.col = custom_colors,
nlfc = 1)
## Warning: Using size for a discrete variable is not advised.
## Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Adjust color palette for visual appeal
custom_colors <- c("#d73027", "#fc8d59", "#fee090", "#91bfdb", "#4575b4")
custom_colors <- c("#4575b4", "#91bfdb", "#fee090", "#fc8d59", "#d73027")
# Generate the final chord diagram with adjusted settings
GOChord(chord_data_custom,
space = 0.04,
gene.order = 'logFC',
gene.size = 4,
gene.space = 0.3,
ribbon.col = custom_colors,
nlfc = 1,
lfc.col = c("#67000d", "#f7f7f7", "#08306b"), # Dark blue to white to dark red
lfc.min = -2, # Minimum value on the logFC scale
lfc.max = 2) # Maximum value on the logFC scale
## Warning: Using size for a discrete variable is not advised.
## Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

Chord Diagram on ORA
(Over Representation Analysis) for Metformin vs Control in the Right
Atrium
# 1. Load GO ORA results
go_file <- "../../02_gene-enrichment/output/GO_ora.tsv.gz"
go_ora <- fread(go_file) # Read in the GO ORA results
# 2. Define selected GO terms and their expected regulation direction
selected_go_terms <- c("oxidative phosphorylation", "glycolytic process",
"chromatin remodeling", "adherens junction", "protein kinase activity")
go_term_direction <- list(
"oxidative phosphorylation" = "down",
"glycolytic process" = "down",
"chromatin remodeling" = "up",
"adherens junction" = "up",
"protein kinase activity" = "up"
)
# 3. Filter GO ORA data based on selected terms and their expected direction for "met_vs_placebo_RA"
filtered_go_ora <- go_ora %>%
filter(Contrast == "met_vs_placebo_RA" & Description %in% selected_go_terms) %>%
rowwise() %>%
filter(Direction == go_term_direction[[Description]])
# 4. Extract relevant gene IDs and GO terms
gene_list <- unique(unlist(strsplit(filtered_go_ora$geneID, "/"))) # Split gene lists to get unique genes
go_terms <- filtered_go_ora$Description
# 5. Load DGE (Differential Gene Expression) results to match log fold-change (logFC) data with gene IDs
dge_file <- "../../01_dge/output/dge_results.tsv.gz"
dge <- fread(dge_file) # Read in DGE results
# 6. Filter DGE data for the specific contrast "met_vs_placebo_RA"
dge_filtered <- dge %>%
filter(Contrast == "met_vs_placebo_RA")
# 7. Extract logFC and match it with gene_list from GO ORA results
logFC_data <- dge_filtered %>%
filter(ENSEMBL %in% gene_list) %>%
select(ENSEMBL, GENENAME, logFC) %>%
filter(GENENAME != "X" & GENENAME != "X1" & GENENAME != "" & !is.na(GENENAME)) # Remove invalid gene names
# 8. Transform GO ORA data: convert ENSEMBL IDs to gene names in the "Genes" column
# A. Select and rename relevant columns to match the structure of EC$david
go_ora_transformed <- filtered_go_ora %>%
select(Category = Database, ID, Term = Description, Genes = geneID, adj_pval = p.adjust)
# B. Split ENSEMBL IDs in the 'Genes' column into individual IDs
go_ora_transformed$Genes_split <- strsplit(go_ora_transformed$Genes, "/")
# C. Create a lookup table from logFC_data to map ENSEMBL to GENENAME
ensembl_to_genename <- setNames(logFC_data$GENENAME, logFC_data$ENSEMBL)
# D. Map ENSEMBL IDs to gene names using the lookup table
go_ora_transformed$Genes_converted <- lapply(go_ora_transformed$Genes_split, function(ensembl_list) {
gene_names <- ensembl_to_genename[ensembl_list] # Replace ENSEMBL with gene names
gene_names[!is.na(gene_names)] # Return valid gene names, discard NAs
})
# E. Collapse the list of gene names into a single string for each row
go_ora_transformed$Genes <- sapply(go_ora_transformed$Genes_converted, paste, collapse = ", ")
# F. Remove intermediate columns (Genes_split and Genes_converted)
go_ora_transformed <- go_ora_transformed %>%
select(-Genes_split, -Genes_converted)
# 9. Prepare logFC data in the structure matching EC$genes
logFC_data_transformed <- logFC_data %>%
select(ID = GENENAME, logFC) # Rename columns to match EC$genes format
# 10. Create the circ object using circle_dat() combining GO terms and logFC data
circ <- circle_dat(go_ora_transformed, logFC_data_transformed)
# 11. Prepare the chord plot data using chord_dat()
chord_data <- chord_dat(data = circ, genes = logFC_data_transformed$ID, process = go_ora_transformed$Term)
# A. Include logFC in the chord data
chord_data <- cbind(chord_data, logFC = logFC_data_transformed$logFC[match(rownames(chord_data), logFC_data_transformed$ID)])
# 12. Generate and plot the chord diagram
GOChord(chord_data, space = 0.02, gene.order = 'logFC', gene.size = 5)
## Warning: Using size for a discrete variable is not advised.
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Custom color palette for processes (ribbons)
custom_colors <- c("oxidative phosphorylation" = "#62b3e5", # blue
"glycolytic process" = "#0db14a", # green
"chromatin remodeling" = "#8b2289", # purple
"adherens junction" = "#b0373d", # red
"protein kinase activity" = "#fcb985") # orange
# Generate and plot the chord diagram with custom ribbon colors
GOChord(chord_data,
space = 0.02,
gene.order = 'logFC',
gene.size = 5,
ribbon.col = custom_colors)
## Warning: Using size for a discrete variable is not advised.
## Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Define relevant genes for a Chord-Diagram
# 1. Manually define your custom gene list based on biological knowledge or examples
custom_gene_list <- c("CHCHD10", "COX7B", "COX4I1", "COX6A1", "PFKL", "ALDOA", "PFKL", "GAPDH",
"SETD7", "SETBP1", "SETD5", "NSD1", "PRKAA2", "PRKAA1",
"BMPR2", "CAMKK2", "MTOR", "PRKACB", "CTNNA3", "TJP1",
"PKP2", "JCAD", "DSP", "CDH2", "DSC2", "JCAD", "GCK",
"TAF1", "RPS6KA5", "PRKCA")
# 2. Filter logFC_data to match only the genes in the custom gene list
logFC_data_custom <- logFC_data %>%
filter(GENENAME %in% custom_gene_list) %>%
select(ID = GENENAME, logFC)
# 3. Update the GO ORA data to keep terms containing the custom genes
go_ora_custom <- go_ora_transformed %>%
filter(grepl(paste(custom_gene_list, collapse = "|"), Genes))
# 4. Prepare the circular data object using circle_dat with the custom gene list
circ_custom <- circle_dat(go_ora_custom, logFC_data_custom)
# 5. Prepare the chord plot data using chord_dat
chord_data_custom <- chord_dat(data = circ_custom, genes = logFC_data_custom$ID, process = go_ora_custom$Term)
# 6. Add the logFC column to the chord data for custom plotting
chord_data_custom <- cbind(chord_data_custom, logFC = logFC_data_custom$logFC[match(rownames(chord_data_custom), logFC_data_custom$ID)])
# 7. Define a simple but elegant color palette for the ribbons
custom_colors <- c("#264653", "#2A9D8F", "#E9C46A", "#F4A261", "#E76F51")
custom_colors <- c("#4575b4", "#91bfdb", "#fee090", "#fc8d59", "#d73027")
# 8. Generate the chord diagram for custom genes
GOChord(chord_data_custom,
space = 0.04,
gene.order = 'logFC',
gene.size = 10,
gene.space = 0.5,
ribbon.col = custom_colors,
nlfc = 1) # Adjust nlfc for the number of logFC categories
## Warning: Using size for a discrete variable is not advised.
## Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
